Introduction

Some review of the subject and the list of hypotheses discussed at meetings.

Table 1

diamonds %>%
  tableone::CreateTableOne(
    data = .,
    includeNA = T,
    # strata = "visit",
    addOverall = T
  ) %>%
  tableone::kableone()
Overall
n 53940
carat (mean (SD)) 0.80 (0.47)
cut (%)
Fair 1610 ( 3.0)
Good 4906 ( 9.1)
Very Good 12082 (22.4)
Premium 13791 (25.6)
Ideal 21551 (40.0)
color (%)
D 6775 (12.6)
E 9797 (18.2)
F 9542 (17.7)
G 11292 (20.9)
H 8304 (15.4)
I 5422 (10.1)
J 2808 ( 5.2)
clarity (%)
I1 741 ( 1.4)
SI2 9194 (17.0)
SI1 13065 (24.2)
VS2 12258 (22.7)
VS1 8171 (15.1)
VVS2 5066 ( 9.4)
VVS1 3655 ( 6.8)
IF 1790 ( 3.3)
depth (mean (SD)) 61.75 (1.43)
table (mean (SD)) 57.46 (2.23)
price (mean (SD)) 3932.80 (3989.44)
x (mean (SD)) 5.73 (1.12)
y (mean (SD)) 5.73 (1.14)
z (mean (SD)) 3.54 (0.71)

Basic descriptive characteristics

Distribution of age by sex

diamonds %>%
  ggplot(aes(price, fill = color)) +
  geom_density(alpha = .3) +
  labs(
    title = "Age Male x Female", x = "Age",
    y = "Density"
  ) + theme_linedraw()

Missing and categories and distribution in one picture

tabplot::tableplot(diamonds)
Missings, categories and distributions

Missings, categories and distributions

Any correlated?

diamonds %>%
  select_if(is_numeric) %>%
  psych::pairs.panels(.,
    method = "pearson", # correlation method
    hist.col = "#00AFBB",
    density = TRUE, # show density plots
    ellipses = TRUE # show correlation ellipses
  )

More info on distribution with boxplots

diamonds %>%
  select_if(is.numeric) %>%
  gather(key = "ind", value = "values") %>%
  ggplot(aes(x = ind, y = values)) +
  geom_boxplot() +
  coord_flip() +
  theme_minimal() +
  scale_fill_grey()

System information

project.info
$config
$config$version
[1] "0.10.1"

$config$data_loading
[1] TRUE

$config$data_loading_header
[1] TRUE

$config$data_ignore
[1] ""

$config$cache_loading
[1] TRUE

$config$recursive_loading
[1] FALSE

$config$munging
[1] TRUE

$config$logging
[1] FALSE

$config$logging_level
[1] "INFO"

$config$load_libraries
[1] TRUE

$config$libraries
[1] "dtplyr"

$config$as_factors
[1] FALSE

$config$tables_type
[1] "data.table"

$config$attach_internal_libraries
[1] FALSE

$config$cache_loaded_data
[1] TRUE

$config$sticky_variables
[1] "NONE"

$config$underscore_variables
[1] TRUE

$config$cache_file_format
[1] "RData"


$packages
[1] "dtplyr"

$helpers
[1] "pclean.R"
0.10.1
INFO
dtplyr
data.table
NONE
RData
dtplyr
pclean.R
sessionInfo()
R version 4.1.2 (2021-11-01)
Platform: x86_64-solus-linux-gnu (64-bit)
Running under: Solus 4.3 Fortitude

Matrix products: default
BLAS/LAPACK: /usr/lib64/haswell/libopenblas_haswellp-r0.3.18.so

locale:
 [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C               LC_TIME=pt_BR.UTF-8        LC_COLLATE=en_US.UTF-8    
 [5] LC_MONETARY=pt_BR.UTF-8    LC_MESSAGES=en_US.UTF-8    LC_PAPER=pt_BR.UTF-8       LC_NAME=C                 
 [9] LC_ADDRESS=C               LC_TELEPHONE=C             LC_MEASUREMENT=pt_BR.UTF-8 LC_IDENTIFICATION=C       

attached base packages:
[1] stats     graphics  grDevices utils     datasets  methods   base     

other attached packages:
 [1] forcats_0.5.1   stringr_1.4.0   dplyr_1.0.7     purrr_0.3.4     readr_2.0.0     tidyr_1.1.3     tibble_3.1.3   
 [8] tidyverse_1.3.1 ggplot2_3.3.5   tabplot_1.4.1   ffbase_0.13.3   ff_4.0.4        bit_4.0.4       dtplyr_1.1.0   
[15] rmarkdown_2.9   nvimcom_0.9-122

loaded via a namespace (and not attached):
 [1] nlme_3.1-153           fs_1.5.0               lubridate_1.7.10       httr_1.4.2             rprojroot_2.0.2       
 [6] tools_4.1.2            backports_1.2.1        bslib_0.2.5.1          utf8_1.2.2             R6_2.5.0              
[11] DBI_1.1.1              colorspace_2.0-2       raster_3.4-13          withr_2.4.2            sp_1.4-5              
[16] tidyselect_1.1.1       mnormt_2.0.2           compiler_4.1.2         cli_3.0.1              rvest_1.0.1           
[21] xml2_1.3.2             ProjectTemplate_0.10.1 labeling_0.4.2         sass_0.4.0             scales_1.1.1          
[26] psych_2.1.6            proxy_0.4-26           digest_0.6.27          base64enc_0.1-3        pkgconfig_2.0.3       
[31] htmltools_0.5.1.1      labelled_2.8.0         highr_0.9              dbplyr_2.1.1           rlang_0.4.11          
[36] readxl_1.3.1           rstudioapi_0.13        jquerylib_0.1.4        generics_0.1.0         farver_2.1.0          
[41] zoo_1.8-9              jsonlite_1.7.2         magrittr_2.0.1         Matrix_1.3-4           Rcpp_1.0.7            
[46] munsell_0.5.0          fansi_0.5.0            lifecycle_1.0.0        stringi_1.7.3          yaml_2.2.1            
[51] grid_4.1.2             parallel_4.1.2         crayon_1.4.1           lattice_0.20-45        haven_2.4.1           
[56] splines_4.1.2          hms_1.1.0              tmvnsim_1.0-2          knitr_1.33             pillar_1.6.1          
[61] codetools_0.2-18       fastmatch_1.1-3        reprex_2.0.0           glue_1.4.2             tableone_0.13.0       
[66] evaluate_0.14          mitools_2.4            data.table_1.14.0      modelr_0.1.8           png_0.1-7             
[71] vctrs_0.3.8            tzdb_0.1.2             cellranger_1.1.0       gtable_0.3.0           qrencoder_0.1.0       
[76] assertthat_0.2.1       xfun_0.24              broom_0.7.9            survey_4.1-1           e1071_1.7-8           
[81] viridisLite_0.4.0      class_7.3-19           survival_3.2-13        ellipsis_0.3.2         here_1.0.1            

References

LS0tCnRpdGxlOiAiVGhlIHRpdGxlIgpkYXRlOiAiYHIgZm9ybWF0KFN5cy50aW1lKCksICclZCAlQiwgJVknIClgIgpsYW5nOiBlbgphdXRob3I6IGF1dGhvciBuYW1lCmFic3RyYWN0OiBUaGlzIGlzIGFuIGFic3RyYWN0Cm51bWJlci1zZWN0aW9uczogdHJ1ZQpvdXRwdXQ6CiAgaHRtbF9ub3RlYm9vazoKICAgIGNvZGVfZm9sZGluZzogaGlkZQogICAgaGlnaGxpZ2h0OiB6ZW5idXJuCiAgICB0aGVtZTogZmxhdGx5CiAgICB0b2M6IHllcwogICAgdG9jX2Zsb2F0OiB5ZXMKICAgIGRmX3ByaW50OiBwYWdlZCAgICAKICAgIGNvZGVfZG93bmxvYWRpbmc6IHllcwojIEJpYmxpb2dyYXBoeQojIGJpYmxpb2dyYXBoeTogIlk6L1Byb2plY3RzL05hbWUvZG9jL2xpYnJhcnkuYmliIgotLS0KCmBgYHtyIHNldHVwLCBpbmNsdWRlPUZBTFNFfQpsaWJyYXJ5KHRpZHl2ZXJzZSkKCnNldC5zZWVkKDQyKQpzZXR3ZChoZXJlOjpoZXJlKCkpICMgbmVlZGVkIGFzIHdlIGFyZSBpbiAvc3JjLCBpbiBsaW51eCBoZXJlKCkgc2hvdWxkIGJlIHVzZWQKCiMgTG9hZGluZyB0aGUgcHJvamVjdApQcm9qZWN0VGVtcGxhdGU6OmxvYWQucHJvamVjdCgpCnBjbGVhbigpCgojIFIgb3B0aW9ucwpvcHRpb25zKAogIGRpZ2l0cyA9IDIsICMgT25seSB0d28gZGVjaW1hbCBkaWdpdHMKICBzY2lwZW4gPSA5OTkgIyBSZW1vdmUgc2NpZW50aWZpYyBub3RhdGlvbiBmb3IgcHJldHR5IHByaW50aW5nCikKCiMgS25pdHIgb3B0aW9ucwprbml0cjo6b3B0c19jaHVuayRzZXQoCiAgY29tbWVudCA9IE5BLCAjIHJlbW92ZSBjb21tZW50IHN5bWJvbAogIGNhY2hlLnBhdGggPSAiLi4vY2FjaGUvIiwgIyB3aGVyZSBzaG91bGQgSSBzYXZlIGNhY2hlPwogIGZpZy5wYXRoID0gIi4uL2dyYXBocy8iLCAjIHdoZXJlIHNob3VsZCBJIHNhdmUgZmlndXJlcz8KICBlY2hvID0gVCwgIyBkb250IGVjaG8gYnkgZGVmYXVsdAogIGNhY2hlID0gRiwgIyBkb250IGNhY2hlIGJ5IGRlZmF1bHQKICBmaWcud2lkdGggPSAxMCwgIyBzZXR0aW5nIHRoZSBiZXN0IHdpdGR0aCBmb3IgZmlndXJlcwogIGZpZy5oZWlnaHQgPSA3LCAjIGJlc3QgaGVpZ2h0CiAgZHBpID0gMzAwLCAjIGhpZ2ggZHBpIGZvciBwdWJsaWNhdGlvbiBxdWFsaXR5CiAgZXJyb3IgPSBGLAogIHdhcm5pbmcgPSBGCikKYGBgCgoKIyBJbnRyb2R1Y3Rpb24KClNvbWUgcmV2aWV3IG9mIHRoZSBzdWJqZWN0IGFuZCB0aGUgbGlzdCBvZiBoeXBvdGhlc2VzIGRpc2N1c3NlZCBhdCBtZWV0aW5ncy4KCiMgVGFibGUgMQoKCmBgYHtyfQpkaWFtb25kcyAlPiUKICB0YWJsZW9uZTo6Q3JlYXRlVGFibGVPbmUoCiAgICBkYXRhID0gLiwKICAgIGluY2x1ZGVOQSA9IFQsCiAgICAjIHN0cmF0YSA9ICJ2aXNpdCIsCiAgICBhZGRPdmVyYWxsID0gVAogICkgJT4lCiAgdGFibGVvbmU6OmthYmxlb25lKCkKYGBgCgoKIyBCYXNpYyBkZXNjcmlwdGl2ZSBjaGFyYWN0ZXJpc3RpY3MKCiMjIERpc3RyaWJ1dGlvbiBvZiBhZ2UgYnkgc2V4CgpgYGB7ciBhZ2UtYnktc2V4LCByZXN1bHRzPSJoaWRlIn0KZGlhbW9uZHMgJT4lCiAgZ2dwbG90KGFlcyhwcmljZSwgZmlsbCA9IGNvbG9yKSkgKwogIGdlb21fZGVuc2l0eShhbHBoYSA9IC4zKSArCiAgbGFicygKICAgIHRpdGxlID0gIkFnZSBNYWxlIHggRmVtYWxlIiwgeCA9ICJBZ2UiLAogICAgeSA9ICJEZW5zaXR5IgogICkgKyB0aGVtZV9saW5lZHJhdygpCmBgYAoKCgojIyBNaXNzaW5nIGFuZCBjYXRlZ29yaWVzIGFuZCBkaXN0cmlidXRpb24gaW4gb25lIHBpY3R1cmUKYGBge3IgdGFicGxvdCwgZmlnLmNhcD0iTWlzc2luZ3MsIGNhdGVnb3JpZXMgYW5kIGRpc3RyaWJ1dGlvbnMifQp0YWJwbG90Ojp0YWJsZXBsb3QoZGlhbW9uZHMpCmBgYAoKIyMgQW55IGNvcnJlbGF0ZWQ/CgoKYGBge3Igc2NhdHRlcn0KZGlhbW9uZHMgJT4lCiAgc2VsZWN0X2lmKGlzX251bWVyaWMpICU+JQogIHBzeWNoOjpwYWlycy5wYW5lbHMoLiwKICAgIG1ldGhvZCA9ICJwZWFyc29uIiwgIyBjb3JyZWxhdGlvbiBtZXRob2QKICAgIGhpc3QuY29sID0gIiMwMEFGQkIiLAogICAgZGVuc2l0eSA9IFRSVUUsICMgc2hvdyBkZW5zaXR5IHBsb3RzCiAgICBlbGxpcHNlcyA9IFRSVUUgIyBzaG93IGNvcnJlbGF0aW9uIGVsbGlwc2VzCiAgKQpgYGAKCgojIyBNb3JlIGluZm8gb24gZGlzdHJpYnV0aW9uIHdpdGggYm94cGxvdHMKCmBgYHtyIGJveHBsb3RzfQpkaWFtb25kcyAlPiUKICBzZWxlY3RfaWYoaXMubnVtZXJpYykgJT4lCiAgZ2F0aGVyKGtleSA9ICJpbmQiLCB2YWx1ZSA9ICJ2YWx1ZXMiKSAlPiUKICBnZ3Bsb3QoYWVzKHggPSBpbmQsIHkgPSB2YWx1ZXMpKSArCiAgZ2VvbV9ib3hwbG90KCkgKwogIGNvb3JkX2ZsaXAoKSArCiAgdGhlbWVfbWluaW1hbCgpICsKICBzY2FsZV9maWxsX2dyZXkoKQpgYGAKCgoKIyBTeXN0ZW0gaW5mb3JtYXRpb24KCmBgYHtyfQpwcm9qZWN0LmluZm8KYGBgCgpgYGB7cn0Kc2Vzc2lvbkluZm8oKQpgYGAKCiMgUmVmZXJlbmNlcwo=